# ============================== SETUP ===============================
rm(list = ls())
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(ggplot2)
library(zoo)
library(fixest)
library(texreg)
# ========================= DATA WRANGLING ==========================
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
tweets <- tweets[tweets$RelevantLogit==1,]
tweets <- tweets[tweets$ads_dict==0,]
tweets$date  <- as.Date(tweets$created_at)
tweets$month <- as.yearmon(tweets$date)
tweets$year  <- lubridate::year(tweets$date)
tweets <- tweets[tweets$date>=as.Date("2016-08-15") & tweets$date<=as.Date("2021-05-15"),]
gabs <- fread("../datasets/input/gabs.csv", stringsAsFactors = FALSE)
gabs <- gabs[gabs$RelevantLogit==1,]
gabs <- gabs[gabs$ads_dict==0,]
gabs$date  <- as.Date(gabs$created_at)
gabs$month <- as.yearmon(gabs$date)
gabs$year  <- lubridate::year(gabs$date)
gabs <- gabs[gabs$date>=as.Date("2016-08-15") & gabs$date<=as.Date("2021-05-15"),]
# ====================== THEME CODING (TWO-KEYWORD) =================
tweets$race_dict        <- ifelse(tweets$race_dict >=2, 1, 0)
tweets$gender_dict      <- ifelse(tweets$gender_dict >=2, 1, 0)
tweets$nationalism_dict <- ifelse(tweets$nationalism_dict >=2, 1, 0)
tweets$partisan_dict    <- ifelse(tweets$partisan_dict >=2, 1, 0)
tweets$religion_dict    <- ifelse(tweets$religion_dict >=2, 1, 0)
tweets$Benevolent_dict           <- ifelse(tweets$Benevolent_dict >= 2, 1, 0)
tweets$Feminism_dict             <- ifelse(tweets$Feminism_dict >= 2, 1, 0)
tweets$GenderIdentification_dict <- ifelse(tweets$GenderIdentification_dict >= 2, 1, 0)
tweets$General_dict              <- ifelse(tweets$General_dict >= 2, 1, 0)
tweets$Hostile_dict              <- ifelse(tweets$Hostile_dict >= 2, 1, 0)
tweets$ReproductiveRights_dict   <- ifelse(tweets$ReproductiveRights_dict >= 2, 1, 0)
tweets$SexualOrientation_dict    <- ifelse(tweets$SexualOrientation_dict >= 2, 1, 0)
gabs$race_dict          <- ifelse(gabs$race_dict >=2, 1, 0)
gabs$gender_dict        <- ifelse(gabs$gender_dict >=2, 1, 0)
gabs$nationalism_dict   <- ifelse(gabs$nationalism_dict >=2, 1, 0)
gabs$partisan_dict      <- ifelse(gabs$partisan_dict >=2, 1, 0)
gabs$religion_dict      <- ifelse(gabs$religion_dict >=2, 1, 0)
gabs$Benevolent_dict           <- ifelse(gabs$Benevolent_dict >= 2, 1, 0)
gabs$Feminism_dict             <- ifelse(gabs$Feminism_dict >= 2, 1, 0)
gabs$GenderIdentification_dict <- ifelse(gabs$GenderIdentification_dict >= 2, 1, 0)
gabs$General_dict              <- ifelse(gabs$General_dict >= 2, 1, 0)
gabs$Hostile_dict              <- ifelse(gabs$Hostile_dict >= 2, 1, 0)
gabs$ReproductiveRights_dict   <- ifelse(gabs$ReproductiveRights_dict >= 2, 1, 0)
gabs$SexualOrientation_dict    <- ifelse(gabs$SexualOrientation_dict >= 2, 1, 0)
tweets$n_themes <- tweets$race_dict + tweets$gender_dict + tweets$nationalism_dict + tweets$partisan_dict + tweets$religion_dict
gabs$n_themes   <- gabs$race_dict   + gabs$gender_dict   + gabs$nationalism_dict   + gabs$partisan_dict   + gabs$religion_dict
table(tweets$n_themes)
table(gabs$n_themes)
tweets$reaction <- tweets$rts + tweets$replys + tweets$likes
gabs$reaction   <- gabs$reblogs_count + gabs$replies_count + gabs$favourites_count
tweets$seed <- ifelse(tweets$seedfollow==999, 1, 0)
gabs$seed   <- ifelse(gabs$seedfollow==999, 1, 0)
# ========================== SUBCATEGORIES OF GENDER =========================
# A) Twitter counts and shares
twitter_subcat_counts <- tweets %>%
summarise(
Benevolent_dict           = sum(Benevolent_dict, na.rm = TRUE),
Feminism_dict             = sum(Feminism_dict, na.rm = TRUE),
GenderIdentification_dict = sum(GenderIdentification_dict, na.rm = TRUE),
General_dict              = sum(General_dict, na.rm = TRUE),
Hostile_dict              = sum(Hostile_dict, na.rm = TRUE),
ReproductiveRights_dict   = sum(ReproductiveRights_dict, na.rm = TRUE),
SexualOrientation_dict    = sum(SexualOrientation_dict, na.rm = TRUE)
)
# Sum across all subcategories
twitter_total_hits <- rowSums(twitter_subcat_counts)
# Convert each raw count to a fraction of the total
twitter_shares <- twitter_subcat_counts / twitter_total_hits
# Add platform label up front
twitter_shares <- twitter_shares %>%
mutate(platform = "Twitter") %>%
relocate(platform)
# B) Gab counts and shares
gab_subcat_counts <- gabs %>%
summarise(
Benevolent_dict           = sum(Benevolent_dict, na.rm = TRUE),
Feminism_dict             = sum(Feminism_dict, na.rm = TRUE),
GenderIdentification_dict = sum(GenderIdentification_dict, na.rm = TRUE),
General_dict              = sum(General_dict, na.rm = TRUE),
Hostile_dict              = sum(Hostile_dict, na.rm = TRUE),
ReproductiveRights_dict   = sum(ReproductiveRights_dict, na.rm = TRUE),
SexualOrientation_dict    = sum(SexualOrientation_dict, na.rm = TRUE)
)
gab_total_hits <- rowSums(gab_subcat_counts)
gab_shares <- gab_subcat_counts / gab_total_hits
gab_shares <- gab_shares %>%
mutate(platform = "Gab") %>%
relocate(platform)
# C) Combine into a 2-row data frame
subcat_share_table <- bind_rows(twitter_shares, gab_shares)
# Print to LaTeX with 5 decimal places
kable(
subcat_share_table,
format = "latex",
digits = 5,
booktabs = TRUE,
caption = "Distribution of subcategory hits among all gender subcategories (sums to 1 per platform)."
)
library(knitr)
# Print to LaTeX with 5 decimal places
kable(
subcat_share_table,
format = "latex",
digits = 5,
booktabs = TRUE,
caption = "Distribution of subcategory hits among all gender subcategories (sums to 1 per platform)."
)
tweets2 <- tweets
#colnames(tweets2)[colnames(tweets2)=="twitter_handle"]  <- "username"
colnames(tweets2)[colnames(tweets2)=="rts"]             <- "reblogs_count"
colnames(tweets2)[colnames(tweets2)=="replys"]          <- "replies_count"
colnames(tweets2)[colnames(tweets2)=="likes"]           <- "favourites_count"
colnames(tweets2)[colnames(tweets2)=="follower_count"]  <- "followers_count"
tweets2$platform <- "twitter"
gabs$platform    <- "gab"
colnames(tweets2)[colnames(tweets2) %in% colnames(gabs)]
gabtweet <- rbind(
tweets2[,c("created_at","username","reblogs_count","replies_count","favourites_count","seedfollow",
"RelevantLogit","ads_dict","ntoken","gender_dict","nationalism_dict","partisan_dict",
"race_dict","religion_dict","n_themes","reaction","year","date","month","seed","platform",
"followers_count","overlap")],
gabs[,c("created_at","username","reblogs_count","replies_count","favourites_count","seedfollow",
"RelevantLogit","ads_dict","ntoken","gender_dict","nationalism_dict","partisan_dict",
"race_dict","religion_dict","n_themes","reaction","year","date","month","seed","platform",
"followers_count","overlap")]
)
gabtweet$repost_log   <- log(gabtweet$reblogs_count+1)
gabtweet$like_log     <- log(gabtweet$favourites_count+1)
gabtweet$reaction_log <- log(gabtweet$reaction+1)
gabtweet$ntoken10 <- gabtweet$ntoken/10
###### Table 3: Leader Posts Garner Higher Engagement than Non-Leader Posts
leaderall_l <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date + platform, data = gabtweet)
leadert_l   <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="twitter",])
leaderg_l   <- feols(like_log   ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="gab",])
leaderall_r <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date + platform, data = gabtweet)
leadert_r   <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="twitter",])
leaderg_r   <- feols(repost_log ~ seed + ntoken10 + log(followers_count+1) | date, data = gabtweet[gabtweet$platform=="gab",])
texreg(list(summary(leaderall_l, cluster = ~ username),
summary(leadert_l,   cluster = ~ username),
summary(leaderg_l,   cluster = ~ username),
summary(leaderall_r, cluster = ~ username),
summary(leadert_r,   cluster = ~ username),
summary(leaderg_r,   cluster = ~ username)), stars = c(0.05))
# ============================== SETUP ===============================
rm(list = ls())
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(ggplot2)
library(zoo)
library(fixest)
library(knitr)
library(texreg)
# ========================= DATA WRANGLING ==========================
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
head(tweets)
